# !pip install -U matplotlib
# !pip install pandas
# !pip install numpy
# !pip install seaborn
# !pip install plotly
# !pip install django
# !pip install streamlit
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
# import streamlit as st
%matplotlib inline
ipl = pd.read_csv('ipl.csv')
ipl_ball = pd.read_csv('./iplball.csv')
ipl_ball.head()
| id | inning | over | ball | batsman | non_striker | bowler | batsman_runs | extra_runs | total_runs | non_boundary | is_wicket | dismissal_kind | player_dismissed | fielder | extras_type | batting_team | bowling_team | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 335982 | 1 | 6 | 5 | RT Ponting | BB McCullum | AA Noffke | 1 | 0 | 1 | 0 | 0 | NaN | NaN | NaN | NaN | Kolkata Knight Riders | Royal Challengers Bangalore |
| 1 | 335982 | 1 | 6 | 6 | BB McCullum | RT Ponting | AA Noffke | 1 | 0 | 1 | 0 | 0 | NaN | NaN | NaN | NaN | Kolkata Knight Riders | Royal Challengers Bangalore |
| 2 | 335982 | 1 | 7 | 1 | BB McCullum | RT Ponting | Z Khan | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | Kolkata Knight Riders | Royal Challengers Bangalore |
| 3 | 335982 | 1 | 7 | 2 | BB McCullum | RT Ponting | Z Khan | 1 | 0 | 1 | 0 | 0 | NaN | NaN | NaN | NaN | Kolkata Knight Riders | Royal Challengers Bangalore |
| 4 | 335982 | 1 | 7 | 3 | RT Ponting | BB McCullum | Z Khan | 1 | 0 | 1 | 0 | 0 | NaN | NaN | NaN | NaN | Kolkata Knight Riders | Royal Challengers Bangalore |
most_runs = ipl_ball.groupby('batsman')[['batsman_runs']].sum().reset_index().sort_values('batsman_runs',ascending=False)
# t10runs = most_runs.iloc[:10]
most_matches = ipl_ball.groupby(['batsman','id'])[['inning']].count().reset_index()
matches = most_matches.groupby('batsman')[['id']].count().sort_values('id',ascending=False).reset_index()
# t10matches = matches.iloc[:10]
balls = ipl_ball.groupby(['batsman','id','inning','over','ball'])[['batsman_runs']].count().reset_index()
balls_played = balls.groupby('batsman')[['batsman_runs']].sum().sort_values('batsman_runs',ascending=False).reset_index()
# t10balls = balls_played.iloc[:10]
bound_6 = ipl_ball[ipl_ball.batsman_runs==6]
sixes = bound_6.groupby(['batsman','id','inning','over','ball'])[['batsman_runs']].count().reset_index()
b6 = sixes.groupby('batsman')[['batsman_runs']].sum().reset_index()
b6.rename(columns = {'batsman_runs':'Sixes'},inplace=True)
bound_4 = ipl_ball[ipl_ball.batsman_runs==4]
fours = bound_4.groupby(['batsman','id','inning','over','ball'])[['batsman_runs']].count().reset_index()
b4 = fours.groupby('batsman')[['batsman_runs']].sum().reset_index()
b4.rename(columns = {'batsman_runs':'Fours'},inplace=True)
boundaries = pd.merge(b4,b6,how='inner',left_on=['batsman'],right_on=['batsman'])
match_runs = ipl_ball.groupby(['batsman','id','bowling_team'])[['batsman_runs']].sum().reset_index()
fifties = match_runs[((match_runs.batsman_runs>49) & (match_runs.batsman_runs<100))]
fifty = fifties.groupby('batsman')[['batsman_runs']].count().reset_index()
Hundreds = match_runs[match_runs.batsman_runs>99]
Hundred = Hundreds.groupby('batsman')[['batsman_runs']].count().reset_index()
centuries = pd.merge(fifty,Hundred,how='left',left_on=['batsman'],right_on=['batsman'])
highest_runs = match_runs.groupby('batsman')[['batsman_runs']].max().reset_index()
wicket_balls = ipl_ball[ipl_ball.is_wicket==1]
outs = wicket_balls.groupby(['batsman'])[['is_wicket']].sum().reset_index()
avg = pd.merge(most_runs,outs,how='inner',left_on=['batsman'],right_on=['batsman'])
avg['Average']=avg.batsman_runs/avg.is_wicket
avg.drop(columns=['batsman_runs','is_wicket'],axis=1,inplace=True)
sr = pd.merge(most_runs,balls_played,how='inner',left_on=['batsman'],right_on=['batsman'])
sr = sr.rename(columns = {'batsman_runs_x':'Runs_scored','batsman_runs_y':'Balls_Faced'})
sr['Strike_rate']=sr.Runs_scored*100/sr.Balls_Faced
sr.drop(columns=['Runs_scored','Balls_Faced'],axis=1,inplace=True)
ipl_ball6 = ipl_ball[ipl_ball.over<7]
most_runs6 = ipl_ball6.groupby('batsman')[['batsman_runs']].sum().reset_index().sort_values('batsman_runs',ascending=False)
balls6 = ipl_ball6.groupby(['batsman','id','inning','over','ball'])[['batsman_runs']].count().reset_index()
balls_played6 = balls6.groupby('batsman')[['batsman_runs']].sum().sort_values('batsman_runs',ascending=False).reset_index()
sr6 = pd.merge(most_runs6,balls_played6,how='inner',left_on=['batsman'],right_on=['batsman'])
sr6 = sr6.rename(columns = {'batsman_runs_x':'Runs_scored','batsman_runs_y':'Balls_Faced'})
sr6['Strike_rate']=sr6.Runs_scored*100/sr6.Balls_Faced
sr6.drop(columns=['Runs_scored','Balls_Faced'],axis=1,inplace=True)
sr6.rename(columns={'Strike_rate':'Strike_rate_6_Overs'},inplace=True)
ipl_ball714 = ipl_ball[((ipl_ball.over<15)&(ipl_ball.over>6))]
most_runs714 = ipl_ball714.groupby('batsman')[['batsman_runs']].sum().reset_index().sort_values('batsman_runs',ascending=False)
balls714 = ipl_ball714.groupby(['batsman','id','inning','over','ball'])[['batsman_runs']].count().reset_index()
balls_played714= balls714.groupby('batsman')[['batsman_runs']].sum().sort_values('batsman_runs',ascending=False).reset_index()
sr714 = pd.merge(most_runs714,balls_played714,how='inner',left_on=['batsman'],right_on=['batsman'])
sr714 = sr714.rename(columns = {'batsman_runs_x':'Runs_scored','batsman_runs_y':'Balls_Faced'})
sr714['Strike_rate']=sr714.Runs_scored*100/sr714.Balls_Faced
sr714.drop(columns=['Runs_scored','Balls_Faced'],axis=1,inplace=True)
sr714.rename(columns={'Strike_rate':'Strike_rate_7-14_Overs'},inplace=True)
ipl_ball20 = ipl_ball[ipl_ball.over<7]
most_runs20 = ipl_ball20.groupby('batsman')[['batsman_runs']].sum().reset_index().sort_values('batsman_runs',ascending=False)
balls20 = ipl_ball20.groupby(['batsman','id','inning','over','ball'])[['batsman_runs']].count().reset_index()
balls_played20 = balls20.groupby('batsman')[['batsman_runs']].sum().sort_values('batsman_runs',ascending=False).reset_index()
sr20 = pd.merge(most_runs20,balls_played20,how='inner',left_on=['batsman'],right_on=['batsman'])
sr20 = sr20.rename(columns = {'batsman_runs_x':'Runs_scored','batsman_runs_y':'Balls_Faced'})
sr20['Strike_rate']=sr20.Runs_scored*100/sr20.Balls_Faced
sr20.drop(columns=['Runs_scored','Balls_Faced'],axis=1,inplace=True)
sr20.rename(columns={'Strike_rate':'Strike_rate_16-20_Overs'},inplace=True)
srm = pd.merge(sr,sr6,how='inner',left_on=['batsman'],right_on=['batsman'])
srm1 = pd.merge(srm,sr714,how='inner',left_on=['batsman'],right_on=['batsman'])
srm2 = pd.merge(srm1,sr20,how='inner',left_on=['batsman'],right_on=['batsman'])
merged1 = pd.merge(most_runs,balls_played,how='inner',left_on=['batsman'],right_on=['batsman'])
merged2 = pd.merge(merged1,matches,how='left',left_on=['batsman'],right_on=['batsman'])
merged2 = merged2.rename(columns = {'batsman_runs_x':'Runs_scored','id':'Matches_played','batsman_runs_y':'Balls_Faced'})
t10batsman = merged2.iloc[:10]
merged3 = pd.merge(merged2,boundaries,how='left',left_on=['batsman'],right_on=['batsman'])
merged4 = pd.merge(merged3,centuries,how='left',left_on=['batsman'],right_on=['batsman'])
merged4 = merged4.rename(columns = {'batsman_runs_x':'Fifties','batsman_runs_y':'Hundreds'})
merged5 = pd.merge(merged4,highest_runs,how='left',left_on=['batsman'],right_on=['batsman'])
merged5 = merged5.rename(columns = {'batsman_runs':'Highest_score'})
merged6 = pd.merge(merged5,avg,how='left',left_on=['batsman'],right_on=['batsman'])
merged7 = pd.merge(merged6,srm2,how='left',left_on=['batsman'],right_on=['batsman'])
merged7.fillna(0,inplace=True)
merged7
| batsman | Runs_scored | Balls_Faced | Matches_played | Fours | Sixes | Fifties | Hundreds | Highest_score | Average | Strike_rate | Strike_rate_6_Overs | Strike_rate_7-14_Overs | Strike_rate_16-20_Overs | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | V Kohli | 5878 | 4609 | 184 | 504.0 | 202.0 | 39.0 | 5.0 | 113 | 36.061350 | 127.533087 | 108.986301 | 125.490196 | 108.986301 |
| 1 | SK Raina | 5368 | 4041 | 189 | 493.0 | 194.0 | 38.0 | 1.0 | 100 | 33.550000 | 132.838406 | 124.242424 | 130.294118 | 124.242424 |
| 2 | DA Warner | 5254 | 3819 | 142 | 510.0 | 195.0 | 48.0 | 4.0 | 126 | 41.698413 | 137.575281 | 130.428769 | 138.564027 | 130.428769 |
| 3 | RG Sharma | 5230 | 4088 | 194 | 458.0 | 214.0 | 39.0 | 1.0 | 109 | 29.548023 | 127.935421 | 109.600000 | 119.159580 | 109.600000 |
| 4 | S Dhawan | 5197 | 4208 | 175 | 591.0 | 109.0 | 41.0 | 2.0 | 106 | 34.190789 | 123.502852 | 118.851133 | 124.695777 | 118.851133 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 532 | IC Pandey | 0 | 2 | 1 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 533 | S Kaushik | 0 | 1 | 1 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 534 | ND Doshi | 0 | 13 | 1 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 535 | V Pratap Singh | 0 | 1 | 1 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 536 | Abdur Razzak | 0 | 2 | 1 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
537 rows × 14 columns
import plotly.express as px
df = merged3
fig = px.scatter(df, x="Balls_Faced", y="Runs_scored", text="batsman", log_x=True, size_max=100,color="Matches_played")
fig.update_traces(textposition='top center')
fig.update_layout(title_text='Batting stats', title_x=0.5)
# st.plotly_chart(fig)
# !streamlit run D:\Installed Programs\lib\site-packages\ipykernel_launcher.py
bat = merged7
upto = 20
t = bat[['batsman','Hundreds',"Fifties"]].iloc[:upto].sort_values(['Fifties','Hundreds'])
pas = t.melt(id_vars='batsman',value_vars=['Fifties','Hundreds'])
pas = pas.pivot(columns='batsman',index='variable',values='value')
dict = pas.to_dict()
keys = dict.keys()
top_list=[]
for i in keys:
top_list.append(i)
for i,j in enumerate(top_list):
plot_list=[]
plot_list.append(dict[top_list[i]]['Fifties'])
plot_list.append(dict[top_list[i]]['Hundreds'])
dict[top_list[i]]=plot_list
category_names = ['Fifties','Hundreds']
results = dict
def survey(results, category_names):
"""
Parameters
----------
results : dict
A mapping from question labels to a list of answers per category.
It is assumed all lists contain the same number of entries and that
it matches the length of *category_names*.
category_names : list of str
The category labels.
"""
keys = results.keys()
labels = []
for i in keys:
labels.append(i)
data = np.array(list(results.values()))
data_cum = data.cumsum(axis=1)
category_colors = plt.get_cmap('RdYlGn')(
np.linspace(0.15, 0.85, data.shape[1]))
fig, ax = plt.subplots(figsize=(14, 8))
ax.invert_yaxis()
ax.xaxis.set_visible(False)
ax.set_xlim(0, np.sum(data, axis=1).max())
for i, (colname, color) in enumerate(zip(category_names, category_colors)):
widths = data[:, i]
starts = data_cum[:, i] - widths
rects = ax.barh(labels, widths, left=starts, height=0.5,
label=colname, color=color)
r, g, b, _ = color
text_color = 'white' if r * g * b < 0.5 else 'darkgrey'
# ax.bar_label(rects, label_type='center', color=text_color)
ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
loc='lower left', fontsize='small')
ax.set_title(f'''Fifties vs Hundreds for top {upto} run scorers''')
return fig
figure = survey(results, category_names)
t = bat[['batsman','Hundreds',"Fifties"]].iloc[:upto].sort_values(['Fifties','Hundreds'],ascending=False)
h = t[['batsman','Hundreds']]
h['type'] = 'Hundred'
h.rename(columns = {'Hundreds':'Value'},inplace=True)
f = t[['batsman','Fifties']]
f['type'] = 'Fifties'
f.rename(columns = {'Fifties':'Value'},inplace=True)
ghy = pd.concat([h,f]).sort_values('Value',ascending=False)
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.bar(t.batsman,t.Hundreds,color='r')
ax.bar(t.batsman,t.Fifties,bottom = t.Hundreds,color='b')
ax.set_ylabel('Total')
# ax.set_title('')
plt.xticks(rotation=90)
# ax.set_yticks()
ax.legend(labels=['Hundreds', 'Fifties'])
fig.show()
<ipython-input-32-14319a5083ad>:10: UserWarning: Matplotlib is currently using module://ipykernel.pylab.backend_inline, which is a non-GUI backend, so cannot show the figure.
fig = px.histogram(ghy, x="batsman", y="Value", color="type", marginal="rug",
hover_data=ghy.columns)
# Plot!
# st.plotly_chart(fig, use_container_width=True)
fig.show()
upto=20
def bar_data(bat,i,j):
t = bat[['batsman',i,j]].iloc[:20].sort_values([i,j],ascending=False)
h = t[['batsman',i]]
h['type'] = i
h.rename(columns = {i:'Value'},inplace=True)
f = t[['batsman',j]]
f['type'] = j
f.rename(columns = {j:'Value'},inplace=True)
ghy = pd.concat([h,f]).sort_values('Value',ascending=False)
return ghy
ghy = bar_data(bat,'Fours','Sixes')
fig = px.bar(t, x="batsman", y="Fifties",hover_data=t.columns)
# Plot!
# st.plotly_chart(fig, use_container_width=True)
fig
t1 = bat[['batsman','Fours',"Sixes"]].iloc[:upto].sort_values(['Fours','Sixes'])
pas = t1.melt(id_vars='batsman',value_vars=['Fours',"Sixes"])
pas = pas.pivot(columns='batsman',index='variable',values='value')
dict = pas.to_dict()
keys = dict.keys()
top_list=[]
for i in keys:
top_list.append(i)
for i,j in enumerate(top_list):
plot_list=[]
plot_list.append(dict[top_list[i]]['Fours'])
plot_list.append(dict[top_list[i]]['Sixes'])
dict[top_list[i]]=plot_list
category_names = ['Fours','Sixes']
results = dict
def survey(results, category_names):
"""
Parameters
----------
results : dict
A mapping from question labels to a list of answers per category.
It is assumed all lists contain the same number of entries and that
it matches the length of *category_names*.
category_names : list of str
The category labels.
"""
keys = results.keys()
labels = []
for i in keys:
labels.append(i)
data = np.array(list(results.values()))
data_cum = data.cumsum(axis=1)
category_colors = plt.get_cmap('RdYlGn')(
np.linspace(0.15, 0.85, data.shape[1]))
fig, ax = plt.subplots(figsize=(14, 8))
ax.invert_yaxis()
ax.xaxis.set_visible(False)
ax.set_xlim(0, np.sum(data, axis=1).max())
for i, (colname, color) in enumerate(zip(category_names, category_colors)):
widths = data[:, i]
starts = data_cum[:, i] - widths
rects = ax.barh(labels, widths, left=starts, height=0.5,
label=colname, color=color)
r, g, b, _ = color
text_color = 'white' if r * g * b < 0.5 else 'darkgrey'
# ax.bar_label(rects, label_type='center', color=text_color)
ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
loc='lower left', fontsize='small')
ax.set_title(f'''Fours vs Sixes for top {upto} run scorers''')
return fig, ax
survey(results, category_names)
plt.show()
plt.figure()
sns.pairplot(bat[['Fours','Sixes','Hundreds',"Fifties"]])
plt.show()
<Figure size 432x288 with 0 Axes>
df = ipl_ball.groupby("batsman")[['batsman_runs']].sum().sort_values('batsman_runs',ascending=False).reset_index()
df1 = list(df.iloc[:20,0])
df2 = ipl_ball[ipl_ball['batsman'].isin(df1)]
df3 = df2.groupby(['batsman','bowling_team'])[['batsman_runs']].sum().reset_index()
df4 = df3.groupby('batsman')[['batsman_runs']].max().reset_index()
merge = pd.merge(df4,df3,how='left',left_on=['batsman','batsman_runs'],right_on=['batsman','batsman_runs']).sort_values(['batsman_runs'], ascending = False)
merge
| batsman | batsman_runs | bowling_team | |
|---|---|---|---|
| 12 | RG Sharma | 939 | Kolkata Knight Riders |
| 5 | DA Warner | 912 | Kolkata Knight Riders |
| 18 | V Kohli | 887 | Chennai Super Kings |
| 10 | MS Dhoni | 823 | Royal Challengers Bangalore |
| 16 | SK Raina | 818 | Mumbai Indians |
| 15 | SK Raina | 818 | Kolkata Knight Riders |
| 4 | CH Gayle | 797 | Kings XI Punjab |
| 14 | S Dhawan | 777 | Chennai Super Kings |
| 13 | RV Uthappa | 756 | Kings XI Punjab |
| 6 | G Gambhir | 730 | Kings XI Punjab |
| 0 | AB de Villiers | 726 | Mumbai Indians |
| 1 | AM Rahane | 677 | Delhi Daredevils |
| 2 | AT Rayudu | 658 | Royal Challengers Bangalore |
| 11 | PA Patel | 583 | Kings XI Punjab |
| 9 | MK Pandey | 578 | Mumbai Indians |
| 3 | BB McCullum | 569 | Royal Challengers Bangalore |
| 17 | SR Watson | 566 | Sunrisers Hyderabad |
| 8 | KD Karthik | 557 | Kings XI Punjab |
| 7 | KA Pollard | 537 | Royal Challengers Bangalore |
| 19 | YK Pathan | 472 | Mumbai Indians |
| 20 | Yuvraj Singh | 459 | Delhi Daredevils |